stage=0

#train_data=/apdcephfs_qy3/share_976139/users/joyounglv/audiollama/data/aishell1/train_asr_aishell1.json

#train_data="audio_data/train_20240322_cn_slides_1100h.jsonl,audio_data/train_20231206_slidespeech_487h.jsonl,audio_data/train_sub_asr_zhen_train_20240322_cn_slides_asr_multi.jsonl"
#valid_data="audio_data/valid_20240322_cn_slides_1100h.jsonl,audio_data/test_20241119_slidespeech.jsonl,audio_data/valid_20240322_cn_slides_1100h_asr_multi.jsonl"

# train_data="audio_data/train_20240322_cn_slides_1100h.jsonl,audio_data/train_20231206_slidespeech_487h.jsonl"
train_data="penguins_data/train_20250318_cn_slides_1100h.jsonl,penguins_data/train_20250318_slidespeech_487h.jsonl"

valid_data="penguins_data/valid_20250330_cn_slides_1100h.jsonl,penguins_data/test_20250330_slidespeech.jsonl"
# valid_data="audio_data/valid_20240322_cn_slides_1100h.jsonl,audio_data/test_20241119_slidespeech.jsonl"

#train_data="audio_data/train_20231206_slidespeech_487h.jsonl"
#valid_data="audio_data/test_20241119_slidespeech.jsonl"

#train_data=audio_data/train_aishell1.jsonl
#valid_data=audio_data/dev_aishell1.jsonl
#train_data="audio_data/train_sub_asr_zhen_train_20240322_cn_slides_asr_multi.jsonl"
#valid_data="audio_data/valid_20240322_cn_slides_1100h_asr_multi.jsonl"
#train_data=$valid_data
# save_prefix=audiollm_asr_whisper_tiny_qwen0.5b_zhen_use_specaug
save_prefix=penguins_asr_PG_Tokens_250330_conformer_pre_proj_qwen0.5b_zhen
# save_prefix=debug
. utils/parse_options.sh







if [ $stage -le 0 ]; then
    echo "Stage 0: Training Model"
    bash launch_distributed.sh \
        --DATA_PATH $train_data \
	--VALID_DATA_PATH $valid_data \
	--SAVE_PREFIX $save_prefix \
	--SCRIPT scripts/train_shallow_penguinsllm.sh
	#--SCRIPT scripts/pretrain_audiollm.sh
fi

exit 0
if [ $stage -le 1 ]; then
    echo "Stage 1: Run EMPTY"
    bash scripts/empty.sh

fi
